package com.gitee.word_count

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession

object WordCount {


  def main(args: Array[String]): Unit = {
    val spark: SparkSession = SparkSession.builder().appName("wc").master("local[*]").getOrCreate()
    val sc: SparkContext = spark.sparkContext
    sc.setLogLevel("WARN")
    val data: RDD[String] = sc.textFile("data/words.txt")
    //flatMap函数: strline -> (str1, str2, str3)
    //map函数:  (str1, str2, str3) -> ((str1, 1),(str2, 1), (str3, 1))
    //reduceByKey函数: ((str1, 1),(str2, 1), (str3, 1)) -> ((str, 3))
    //打印
    data.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).foreach(println)
  }
}
